#1 Read the dataset

setwd("C:/Users/imano/Downloads/repdata_data_activity")
activity<-read.csv("activity.csv")

#2 Histogram I made a histogram

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
steps <- activity[!(is.na(activity$steps)),]
newdata<-ddply(steps,.(date),summarise, sum =sum(steps))

hist(newdata$sum, xlab="Steps per day", breaks=53,
     main="Histogram of the Total Number of Steps Taken per Day", col = "blue")

3 Mean and median number of steps taken each day

I calculate and report the mean and median of the total number of steps taken per day

media<-mean(newdata$sum)
median<-median(newdata$sum)
table(media, median)
##                   median
## media              10765
##   10766.1886792453     1
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.1.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
plot_ly(y=newdata$sum, type="box")

Time series plot of the average number of steps take

stepsinterval <- aggregate( steps~interval, steps,mean)

plot(steps~interval, data=stepsinterval, type="l")

The 5-minute interval that, on average, contains the maximum number of steps

maxsteps <- stepsinterval[which.max(stepsinterval$steps),]$interval
maxsteps
## [1] 835

Code to describe and show a strategy for imputing missing data

Calculate and report the total number of missing values in the dataset (i.e. the total number of rows with 𝙽𝙰s

str(activity)
## 'data.frame':    17568 obs. of  3 variables:
##  $ steps   : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ date    : chr  "2012-10-01" "2012-10-01" "2012-10-01" "2012-10-01" ...
##  $ interval: int  0 5 10 15 20 25 30 35 40 45 ...
misingvalues<-sum(is.na(activity$steps))

Devise a strategy for filling in all of the missing values in the dataset. I use as strategy, i completed the missing values in the dataset with the mean per interval.

meanstepsinterval<-function(interval){
    stepsinterval[stepsinterval$interval==interval,]$steps
}

I buid a dataset where the mising values are completed with the mean of the mean per interval

activitydata<-activity 
for(i in 1:nrow(activitydata)){
    if(is.na(activitydata[i,]$steps)){
        activitydata[i,]$steps <- meanstepsinterval(activitydata[i,]$interval)
    }
}

The new dataset doesnt have missing values

Make a histogram of the total number of steps taken each day and Calculate and report the mean and median total number of steps taken per day. Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?

totalsteps <- aggregate(steps ~ date, data=activitydata, sum)
hist(totalsteps$steps)

mediasteps <- mean(totalsteps$steps)
medianSsteps <- median(totalsteps$step)

Are there differences in activity patterns between weekdays and weekends?

Create a new factor variable in the dataset with two levels – “weekday” and “weekend” indicating whether a given date is a weekday or weekend day.

activitydata$date <- as.Date(strptime(activitydata$date, format="%Y-%m-%d"))
activitydata$day <- weekdays(activitydata$date)
for (i in 1:nrow(activitydata)) {
    if (activitydata[i,]$day %in% c("Saturday","Sunday")) {
        activitydata[i,]$day<-"weekend"
    }
    else{
        activitydata[i,]$day<-"weekday"
    }
}
stepsday <- aggregate(activitydata$steps ~ activitydata$interval + activitydata$day, activitydata, mean)

I made a graph with the pattern

names(stepsday) <- c("interval", "day", "steps")
library(lattice)
## Warning: package 'lattice' was built under R version 4.1.2
xyplot(steps ~ interval | day, stepsday, type = "l", layout = c(1, 2), 
    xlab = "Interval", ylab = "Number of steps")